// FIXME 
// work around for __syncthreads
#define __syncthreads()

// #include "hip/hip_runtime_api_4_cuda_runtime.h"
#include "hip/hip_host_runtime_api.h"


#define cuModule hipModule_t
#define cuFunction hipFunction_t

#define cudaError_t hipError_t
#define cudaSuccess hipSuccess
#define cudaGetErrorString hipGetErrorString
#define cudaStream_t hipStream_t

// NOTE: mlvm is unified memory
// FIXME mlvm how to auto Register/Unregister after switch to hipHostMalloc?
// #define cudaMalloc hipMalloc
#define cudaMalloc hipHostMalloc
#define hipMalloc hipHostMalloc

#define cudaHostMalloc hipHostMalloc

// #define cudaFree hipFree
#define cudaFree hipHostFree
#define hipFree hipHostFree

#define cudaFreeHost hipFreeHost
#define cudaHostFree hipHostFree

#define cudaFuncGetAttributes hipFuncGetAttributes
#define cudaDeviceSynchronize hipDeviceSynchronize
#define cudaDeviceReset hipDeviceReset
#define cudaSetDevice hipSetDevice
#define cudaGetDevice hipGetDevice
#define cudaGetDeviceCount hipGetDeviceCount
#define cudaDeviceGetAttribute hipDeviceGetAttribute
#define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaDeviceSetCacheConfig hipDeviceSetCacheConfig
#define cudaDeviceGetCacheConfig hipDeviceGetCacheConfig
#define cudaDeviceGetLimit hipDeviceGetLimit
#define cudaFuncSetCacheConfig hipFuncSetCacheConfig
#define cudaDeviceGetSharedMemConfig hipDeviceGetSharedMemConfig
#define cudaDeviceSetSharedMemConfig hipDeviceSetSharedMemConfig
#define cudaSetDeviceFlags hipSetDeviceFlags
#define cudaChooseDevice hipChooseDevice
#define cudaGetLastError hipGetLastError
#define cudaPeekAtLastError hipPeekAtLastError
#define cudaGetErrorName hipGetErrorName
#define cudaGetErrorString hipGetErrorString
#define cudaStreamCreate hipStreamCreate
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
#define cudaStreamDestroy hipStreamDestroy
#define cudaStreamQuery hipStreamQuery
#define cudaStreamSynchronize hipStreamSynchronize
#define cudaStreamWaitEvent hipStreamWaitEvent
#define cudaStreamGetFlags hipStreamGetFlags
#define cudaStreamCallback_t hipStreamCallback_t
#define cudaStreamAddCallback hipStreamAddCallback
#define cudaDeviceGetStreamPriorityRange hipDeviceGetStreamPriorityRange
#define cudaStreamCreateWithPriority hipStreamCreateWithPriority
#define cudaEventCreateWithFlags hipEventCreateWithFlags
#define cudaEventCreate hipEventCreate
#define cudaEventRecord hipEventRecord
#define cudaEventRecord hipEventRecord
#define cudaEventDestroy hipEventDestroy
#define cudaEventSynchronize hipEventSynchronize
#define cudaEventElapsedTime hipEventElapsedTime
#define cudaEventQuery hipEventQuery
#define cudaPointerGetAttributes hipPointerGetAttributes
#define cudaMallocHost hipMallocHost
#define cudaHostMalloc hipHostMalloc

// #define cudaHostAlloc hipHostAlloc
#define cudaHostAlloc hipHostMalloc

#define cudaHostGetDevicePointer hipHostGetDevicePointer
#define cudaHostGetFlags hipHostGetFlags
#define cudaHostRegister hipHostRegister
#define cudaHostUnregister hipHostUnregister
#define cudaMallocPitch hipMallocPitch


#define cudaMemcpy hipMemcpy
#define cudaMemcpyHtoD hipMemcpyHtoD
#define cudaMemcpyDtoH hipMemcpyDtoH
#define cudaMemcpyDtoD hipMemcpyDtoD
#define cudaMemcpyHtoDAsync hipMemcpyHtoDAsync
#define cudaMemcpyDtoHAsync hipMemcpyDtoHAsync
#define cudaMemcpyDtoDAsync hipMemcpyDtoDAsync
#define cudaGetSymbolAddress hipGetSymbolAddress
#define cudaGetSymbolSize hipGetSymbolSize

// FIXME 
#define hipMemcpyToSymbol hip_impl::hipMemcpyToSymbol
#define hipMemcpyFromSymbol hip_impl::hipMemcpyFromSymbol

#define cudaMemcpyToSymbol hipMemcpyToSymbol
#define cudaMemcpyToSymbolAsync hipMemcpyToSymbolAsync
#define cudaMemcpyFromSymbol hipMemcpyFromSymbol
#define cudaMemcpyFromSymbolAsync hipMemcpyFromSymbolAsync

#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemcpyAsync hipMemcpyAsync
#define cudaMemset hipMemset
#define cudaMemsetD8 hipMemsetD8
#define cudaMemsetD32 hipMemsetD32
#define cudaMemsetAsync hipMemsetAsync
#define cudaMemsetAsync hipMemsetAsync
#define cudaMemsetD32Async hipMemsetD32Async
#define cudaMemsetD32Async hipMemsetD32Async
#define cudaMemset2D hipMemset2D
#define cudaMemset2DAsync hipMemset2DAsync
#define cudaMemset2DAsync hipMemset2DAsync
#define cudaMemset3D hipMemset3D
#define cudaMemset3DAsync hipMemset3DAsync
#define cudaMemset3DAsync hipMemset3DAsync
#define cudaMemGetInfo hipMemGetInfo
#define cudaMemPtrGetInfo hipMemPtrGetInfo
#define cudaMallocArray hipMallocArray
#define cudaMallocArray hipMallocArray
#define cudaArrayCreate hipArrayCreate
#define cudaArray3DCreate hipArray3DCreate
#define cudaMalloc3D hipMalloc3D
#define cudaFreeArray hipFreeArray
#define cudaMalloc3DArray hipMalloc3DArray
#define cudaMemcpy2D hipMemcpy2D
#define cudaMemcpyParam2D hipMemcpyParam2D
#define cudaMemcpy2DAsync hipMemcpy2DAsync
#define cudaMemcpy2DAsync hipMemcpy2DAsync
#define cudaMemcpy2DToArray hipMemcpy2DToArray
#define cudaMemcpyToArray hipMemcpyToArray
#define cudaMemcpyFromArray hipMemcpyFromArray
#define cudaMemcpy3D hipMemcpy3D
#define cudaDeviceCanAccessPeer hipDeviceCanAccessPeer
#define cudaDeviceEnablePeerAccess hipDeviceEnablePeerAccess
#define cudaDeviceDisablePeerAccess hipDeviceDisablePeerAccess
#define cudaMemGetAddressRange hipMemGetAddressRange
#define cudaMemcpyPeer hipMemcpyPeer
#define cudaMemcpyPeerAsync hipMemcpyPeerAsync
#define cudaMemcpyPeerAsync hipMemcpyPeerAsync
#define cudaInit hipInit
#define cudaCtxCreate hipCtxCreate
#define cudaCtxDestroy hipCtxDestroy
#define cudaCtxPopCurrent hipCtxPopCurrent
#define cudaCtxPushCurrent hipCtxPushCurrent
#define cudaCtxSetCurrent hipCtxSetCurrent
#define cudaCtxGetCurrent hipCtxGetCurrent
#define cudaCtxGetDevice hipCtxGetDevice
#define cudaCtxGetApiVersion hipCtxGetApiVersion
#define cudaCtxGetCacheConfig hipCtxGetCacheConfig
#define cudaCtxSetCacheConfig hipCtxSetCacheConfig
#define cudaCtxSetSharedMemConfig hipCtxSetSharedMemConfig
#define cudaCtxGetSharedMemConfig hipCtxGetSharedMemConfig
#define cudaCtxSynchronize hipCtxSynchronize
#define cudaCtxGetFlags hipCtxGetFlags
#define cudaCtxEnablePeerAccess hipCtxEnablePeerAccess
#define cudaCtxDisablePeerAccess hipCtxDisablePeerAccess
#define cudaDevicePrimaryCtxGetState hipDevicePrimaryCtxGetState
#define cudaDevicePrimaryCtxRelease hipDevicePrimaryCtxRelease
#define cudaDevicePrimaryCtxRetain hipDevicePrimaryCtxRetain
#define cudaDevicePrimaryCtxReset hipDevicePrimaryCtxReset
#define cudaDevicePrimaryCtxSetFlags hipDevicePrimaryCtxSetFlags
#define cudaDeviceGet hipDeviceGet
#define cudaDeviceComputeCapability hipDeviceComputeCapability
#define cudaDeviceGetName hipDeviceGetName
#define cudaDeviceGetPCIBusId hipDeviceGetPCIBusId
#define cudaDeviceGetByPCIBusId hipDeviceGetByPCIBusId
#define cudaDeviceTotalMem hipDeviceTotalMem
#define cudaDriverGetVersion hipDriverGetVersion
#define cudaRuntimeGetVersion hipRuntimeGetVersion
#define cuModuleLoad hipModuleLoad
#define cuModuleUnload hipModuleUnload
#define cuModuleGetFunction hipModuleGetFunction
#define cuModuleGetGlobal hipModuleGetGlobal
#define cuModuleGetTexRef hipModuleGetTexRef
#define cuModuleLoadData hipModuleLoadData
#define cuModuleLoadDataEx hipModuleLoadDataEx
#define cuModuleLaunchKernel hipModuleLaunchKernel
#define cudaProfilerStart hipProfilerStart
#define cudaProfilerStop hipProfilerStop
#define cudaIpcGetMemHandle hipIpcGetMemHandle
#define cudaIpcOpenMemHandle hipIpcOpenMemHandle
#define cudaIpcCloseMemHandle hipIpcCloseMemHandle
#define cudaConfigureCall hipConfigureCall
#define cudaLaunchByPtr hipLaunchByPtr
#define cudaSetupArgument hipSetupArgument
#define cudaBindTexture hipBindTexture
// #define  ihipBindTextureImpl
#define cudaBindTexture hipBindTexture
#define cudaBindTexture hipBindTexture
#define cudaBindTexture2D hipBindTexture2D
// #define  ihipBindTexture2DImpl
#define cudaBindTexture2D hipBindTexture2D
#define cudaBindTexture2D hipBindTexture2D
#define cudaBindTextureToArray hipBindTextureToArray
// #define  ihipBindTextureToArrayImpl
#define cudaBindTextureToArray hipBindTextureToArray
#define cudaBindTextureToArray hipBindTextureToArray
#define cudaBindTextureToMipmappedArray hipBindTextureToMipmappedArray
#define cudaBindTextureToMipmappedArray hipBindTextureToMipmappedArray
#define cudaBindTextureToMipmappedArray hipBindTextureToMipmappedArray
#define cudaUnbindTexture hipUnbindTexture
// #define  ihipUnbindTextureImpl
#define cudaUnbindTexture hipUnbindTexture
#define cudaGetChannelDesc hipGetChannelDesc
#define cudaGetTextureAlignmentOffset hipGetTextureAlignmentOffset
#define cudaGetTextureReference hipGetTextureReference
#define cudaCreateTextureObject hipCreateTextureObject
#define cudaDestroyTextureObject hipDestroyTextureObject
#define cudaGetTextureObjectResourceDesc hipGetTextureObjectResourceDesc
#define cudaGetTextureObjectResourceViewDesc hipGetTextureObjectResourceViewDesc
#define cudaGetTextureObjectTextureDesc hipGetTextureObjectTextureDesc
#define cudaTexRefSetArray hipTexRefSetArray
#define cudaTexRefSetAddressMode hipTexRefSetAddressMode
#define cudaTexRefSetFilterMode hipTexRefSetFilterMode
#define cudaTexRefSetFlags hipTexRefSetFlags
#define cudaTexRefSetFormat hipTexRefSetFormat
#define cudaTexRefSetAddress hipTexRefSetAddress
#define cudaTexRefSetAddress2D hipTexRefSetAddress2D


#define cudaDeviceArch_t hipDeviceArch_t
#define cudaDeviceProp_t hipDeviceProp_t
#define cudaDeviceProp hipDeviceProp_t
#define cudaMemoryTypeHost hipMemoryTypeHost
#define cudaMemoryTypeDevice hipMemoryTypeDevice
#define cudaMemoryTypeArray hipMemoryTypeArray
#define cudaMemoryTypeUnified hipMemoryTypeUnified
#define cudaMemoryType hipMemoryType
#define cudaPointerAttribute_t hipPointerAttribute_t
#define cudaSuccess hipSuccess
#define cudaErrorOutOfMemory hipErrorOutOfMemory
#define cudaErrorNotInitialized hipErrorNotInitialized
#define cudaErrorDeinitialized hipErrorDeinitialized
#define cudaErrorProfilerDisabled hipErrorProfilerDisabled
#define cudaErrorProfilerNotInitialized hipErrorProfilerNotInitialized
#define cudaErrorProfilerAlreadyStarted hipErrorProfilerAlreadyStarted
#define cudaErrorProfilerAlreadyStopped hipErrorProfilerAlreadyStopped
#define cudaErrorInsufficientDriver hipErrorInsufficientDriver
#define cudaErrorInvalidImage hipErrorInvalidImage
#define cudaErrorInvalidContext hipErrorInvalidContext
#define cudaErrorContextAlreadyCurrent hipErrorContextAlreadyCurrent
#define cudaErrorMapFailed hipErrorMapFailed
#define cudaErrorUnmapFailed hipErrorUnmapFailed
#define cudaErrorArrayIsMapped hipErrorArrayIsMapped
#define cudaErrorAlreadyMapped hipErrorAlreadyMapped
#define cudaErrorNoBinaryForGpu hipErrorNoBinaryForGpu
#define cudaErrorAlreadyAcquired hipErrorAlreadyAcquired
#define cudaErrorNotMapped hipErrorNotMapped
#define cudaErrorNotMappedAsArray hipErrorNotMappedAsArray
#define cudaErrorNotMappedAsPointer hipErrorNotMappedAsPointer
#define cudaErrorECCNotCorrectable hipErrorECCNotCorrectable
#define cudaErrorUnsupportedLimit hipErrorUnsupportedLimit
#define cudaErrorContextAlreadyInUse hipErrorContextAlreadyInUse
#define cudaErrorPeerAccessUnsupported hipErrorPeerAccessUnsupported
#define cudaErrorInvalidKernelFile hipErrorInvalidKernelFile
#define cudaErrorInvalidGraphicsContext hipErrorInvalidGraphicsContext
#define cudaErrorInvalidSource hipErrorInvalidSource
#define cudaErrorFileNotFound hipErrorFileNotFound
#define cudaErrorSharedObjectSymbolNotFound hipErrorSharedObjectSymbolNotFound
#define cudaErrorSharedObjectInitFailed hipErrorSharedObjectInitFailed
#define cudaErrorOperatingSystem hipErrorOperatingSystem
#define cudaErrorSetOnActiveProcess hipErrorSetOnActiveProcess
#define cudaErrorInvalidHandle hipErrorInvalidHandle
#define cudaErrorNotFound hipErrorNotFound
#define cudaErrorIllegalAddress hipErrorIllegalAddress
#define cudaErrorInvalidSymbol hipErrorInvalidSymbol
#define cudaErrorMissingConfiguration hipErrorMissingConfiguration
#define cudaErrorMemoryAllocation hipErrorMemoryAllocation
#define cudaErrorInitializationError hipErrorInitializationError
#define cudaErrorLaunchFailure hipErrorLaunchFailure
#define cudaErrorPriorLaunchFailure hipErrorPriorLaunchFailure
#define cudaErrorLaunchTimeOut hipErrorLaunchTimeOut
#define cudaErrorLaunchOutOfResources hipErrorLaunchOutOfResources
#define cudaErrorInvalidDeviceFunction hipErrorInvalidDeviceFunction
#define cudaErrorInvalidConfiguration hipErrorInvalidConfiguration
#define cudaErrorInvalidDevice hipErrorInvalidDevice
#define cudaErrorInvalidValue hipErrorInvalidValue
#define cudaErrorInvalidDevicePointer hipErrorInvalidDevicePointer
#define cudaErrorInvalidMemcpyDirection hipErrorInvalidMemcpyDirection
#define cudaErrorUnknown hipErrorUnknown
#define cudaErrorInvalidResourceHandle hipErrorInvalidResourceHandle
#define cudaErrorNotReady hipErrorNotReady
#define cudaErrorNoDevice hipErrorNoDevice
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
#define cudaErrorRuntimeMemory hipErrorRuntimeMemory
#define cudaErrorRuntimeOther hipErrorRuntimeOther
#define cudaErrorHostMemoryAlreadyRegistered hipErrorHostMemoryAlreadyRegistered
#define cudaErrorHostMemoryNotRegistered hipErrorHostMemoryNotRegistered
#define cudaErrorMapBufferObjectFailed hipErrorMapBufferObjectFailed
#define cudaErrorAssert hipErrorAssert
#define cudaErrorTbd hipErrorTbd
#define cudaError_t hipError_t
#define cudaDeviceAttributeMaxThreadsPerBlock hipDeviceAttributeMaxThreadsPerBlock
#define cudaDeviceAttributeMaxBlockDimX hipDeviceAttributeMaxBlockDimX
#define cudaDeviceAttributeMaxBlockDimY hipDeviceAttributeMaxBlockDimY
#define cudaDeviceAttributeMaxBlockDimZ hipDeviceAttributeMaxBlockDimZ
#define cudaDeviceAttributeMaxGridDimX hipDeviceAttributeMaxGridDimX
#define cudaDeviceAttributeMaxGridDimY hipDeviceAttributeMaxGridDimY
#define cudaDeviceAttributeMaxGridDimZ hipDeviceAttributeMaxGridDimZ
#define cudaDeviceAttributeMaxSharedMemoryPerBlock hipDeviceAttributeMaxSharedMemoryPerBlock
#define cudaDeviceAttributeTotalConstantMemory hipDeviceAttributeTotalConstantMemory
#define cudaDeviceAttributeWarpSize hipDeviceAttributeWarpSize
#define cudaDeviceAttributeMaxRegistersPerBlock hipDeviceAttributeMaxRegistersPerBlock
#define cudaDeviceAttributeClockRate hipDeviceAttributeClockRate
#define cudaDeviceAttributeMemoryClockRate hipDeviceAttributeMemoryClockRate
#define cudaDeviceAttributeMemoryBusWidth hipDeviceAttributeMemoryBusWidth
#define cudaDeviceAttributeMultiprocessorCount hipDeviceAttributeMultiprocessorCount
#define cudaDeviceAttributeComputeMode hipDeviceAttributeComputeMode
#define cudaDeviceAttributeL2CacheSize hipDeviceAttributeL2CacheSize
#define cudaDeviceAttributeMaxThreadsPerMultiProcessor hipDeviceAttributeMaxThreadsPerMultiProcessor
#define cudaDeviceAttributeComputeCapabilityMajor hipDeviceAttributeComputeCapabilityMajor
#define cudaDeviceAttributeComputeCapabilityMinor hipDeviceAttributeComputeCapabilityMinor
#define cudaDeviceAttributeConcurrentKernels hipDeviceAttributeConcurrentKernels
#define cudaDeviceAttributePciBusId hipDeviceAttributePciBusId
#define cudaDeviceAttributePciDeviceId hipDeviceAttributePciDeviceId
#define cudaDeviceAttributeMaxSharedMemoryPerMultiprocessor hipDeviceAttributeMaxSharedMemoryPerMultiprocessor
#define cudaDeviceAttributeIsMultiGpuBoard hipDeviceAttributeIsMultiGpuBoard
#define cudaDeviceAttributeIntegrated hipDeviceAttributeIntegrated
#define cudaDeviceAttribute_t hipDeviceAttribute_t
#define cudaComputeMode hipComputeMode
#define cudaComputeModeDefault hipComputeModeDefault
#define cudaComputeModeExclusive hipComputeModeExclusive
#define cudaComputeModeProhibited hipComputeModeProhibited
#define cudaComputeModeExclusiveProcess hipComputeModeExclusiveProcess


#define hipThreadIdx_x threadIdx.x
#define hipThreadIdx_y threadIdx.y
#define hipThreadIdx_z threadIdx.z

#define hipBlockIdx_x blockIdx.x
#define hipBlockIdx_y blockIdx.y
#define hipBlockIdx_z blockIdx.z

#define hipBlockDim_x blockDim.x
#define hipBlockDim_y blockDim.y
#define hipBlockDim_z blockDim.z

#define hipGridDim_x gridDim.x
#define hipGridDim_y gridDim.y
#define hipGridDim_z gridDim.z

// below from driver_type.h
#define cudaArray hipArray
#define cudaArray_const_t hipArray_const_t
#define cudaArray_Format hipArray_Format
#define cudaArray_t hipArray_t
#define cudaChannelFormatDesc hipChannelFormatDesc
#define cudaChannelFormatKind hipChannelFormatKind
#define cudaChannelFormatKindFloat hipChannelFormatKindFloat
#define cudaChannelFormatKindNone hipChannelFormatKindNone
#define cudaChannelFormatKindSigned hipChannelFormatKindSigned
#define cudaChannelFormatKindUnsigned hipChannelFormatKindUnsigned
#define cudaDeviceptr_t hipDeviceptr_t
#define cudaExtent hipExtent
#define cuda_Memcpy2D hip_Memcpy2D
#define cudaMemcpy3DParms hipMemcpy3DParms
#define cudaMemcpyDefault hipMemcpyDefault
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyHostToHost hipMemcpyHostToHost
#define cudaMemcpyKind hipMemcpyKind
#define cudaMemoryType hipMemoryType
#define cudaMipmappedArray hipMipmappedArray
#define cudaMipmappedArray_const_t hipMipmappedArray_const_t
#define cudaMipmappedArray_t hipMipmappedArray_t
#define cudaPitchedPtr hipPitchedPtr
#define cudaPos hipPos
#define cudaResourceDesc hipResourceDesc
#define cudaResourceType hipResourceType
#define cudaResourceTypeArray hipResourceTypeArray
#define cudaResourceTypeLinear hipResourceTypeLinear
#define cudaResourceTypeMipmappedArray hipResourceTypeMipmappedArray
#define cudaResourceTypePitch2D hipResourceTypePitch2D
#define cudaResourceViewDesc hipResourceViewDesc
#define cudaResourceViewFormat hipResourceViewFormat
#define cudaResViewFormatFloat1 hipResViewFormatFloat1
#define cudaResViewFormatFloat2 hipResViewFormatFloat2
#define cudaResViewFormatFloat4 hipResViewFormatFloat4
#define cudaResViewFormatHalf1 hipResViewFormatHalf1
#define cudaResViewFormatHalf2 hipResViewFormatHalf2
#define cudaResViewFormatHalf4 hipResViewFormatHalf4
#define cudaResViewFormatNone hipResViewFormatNone
#define cudaResViewFormatSignedBlockCompressed4 hipResViewFormatSignedBlockCompressed4
#define cudaResViewFormatSignedBlockCompressed5 hipResViewFormatSignedBlockCompressed5
#define cudaResViewFormatSignedBlockCompressed6H hipResViewFormatSignedBlockCompressed6H
#define cudaResViewFormatSignedChar1 hipResViewFormatSignedChar1
#define cudaResViewFormatSignedChar2 hipResViewFormatSignedChar2
#define cudaResViewFormatSignedChar4 hipResViewFormatSignedChar4
#define cudaResViewFormatSignedInt1 hipResViewFormatSignedInt1
#define cudaResViewFormatSignedInt2 hipResViewFormatSignedInt2
#define cudaResViewFormatSignedInt4 hipResViewFormatSignedInt4
#define cudaResViewFormatSignedShort1 hipResViewFormatSignedShort1
#define cudaResViewFormatSignedShort2 hipResViewFormatSignedShort2
#define cudaResViewFormatSignedShort4 hipResViewFormatSignedShort4
#define cudaResViewFormatUnsignedBlockCompressed1 hipResViewFormatUnsignedBlockCompressed1
#define cudaResViewFormatUnsignedBlockCompressed2 hipResViewFormatUnsignedBlockCompressed2
#define cudaResViewFormatUnsignedBlockCompressed3 hipResViewFormatUnsignedBlockCompressed3
#define cudaResViewFormatUnsignedBlockCompressed4 hipResViewFormatUnsignedBlockCompressed4
#define cudaResViewFormatUnsignedBlockCompressed5 hipResViewFormatUnsignedBlockCompressed5
#define cudaResViewFormatUnsignedBlockCompressed6H hipResViewFormatUnsignedBlockCompressed6H
#define cudaResViewFormatUnsignedBlockCompressed7 hipResViewFormatUnsignedBlockCompressed7
#define cudaResViewFormatUnsignedChar1 hipResViewFormatUnsignedChar1
#define cudaResViewFormatUnsignedChar2 hipResViewFormatUnsignedChar2
#define cudaResViewFormatUnsignedChar4 hipResViewFormatUnsignedChar4
#define cudaResViewFormatUnsignedInt1 hipResViewFormatUnsignedInt1
#define cudaResViewFormatUnsignedInt2 hipResViewFormatUnsignedInt2
#define cudaResViewFormatUnsignedInt4 hipResViewFormatUnsignedInt4
#define cudaResViewFormatUnsignedShort1 hipResViewFormatUnsignedShort1
#define cudaResViewFormatUnsignedShort2 hipResViewFormatUnsignedShort2
#define cudaResViewFormatUnsignedShort4 hipResViewFormatUnsignedShort4

#define cudaEvent_t hipEvent_t

#define CUDART_CB

// Kernel builtin 
// #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
